Fly Balls Listed at all 30 MLB Ballparks

# ==============================================================================
# 1. SETUP & SEASON DATA FETCHING (CHUNKING)
# ==============================================================================
if (!require("pacman")) install.packages("pacman")
Loading required package: pacman
pacman::p_load(tidyverse, plotly, janitor, stringr)

# Function to fetch a specific date range
fetch_chunk <- function(start_date, end_date) {
  url <- paste0(
    "https://baseballsavant.mlb.com/statcast_search/csv",
    "?all=true&type=details",
    "&game_date_gt=", start_date,
    "&game_date_lt=", end_date,
    "&player_type=batter"
  )
  message("Downloading: ", start_date, " to ", end_date)
  tryCatch({
    read.csv(url, stringsAsFactors = FALSE) %>% janitor::clean_names() %>% as_tibble()
  }, error = function(e) { message("Error in chunk; skipping."); return(tibble()) })
}

# Master function to get the WHOLE 2024 SEASON
get_full_season_data <- function() {
  # Define monthly chunks to avoid hitting row limits/timeouts
  date_ranges <- list(
    c("2024-03-28", "2024-04-30"),
    c("2024-05-01", "2024-05-31"),
    c("2024-06-01", "2024-06-30"),
    c("2024-07-01", "2024-07-31"),
    c("2024-08-01", "2024-08-31"),
    c("2024-09-01", "2024-09-29")
  )
  
  # Loop and bind
  all_data <- map_dfr(date_ranges, ~fetch_chunk(.x[1], .x[2]))
  return(all_data)
}

# EXECUTE DOWNLOAD (This may take 1-2 minutes)
statcast_data <- get_full_season_data()
Downloading: 2024-03-28 to 2024-04-30
Downloading: 2024-05-01 to 2024-05-31
Downloading: 2024-06-01 to 2024-06-30
Downloading: 2024-07-01 to 2024-07-31
Downloading: 2024-08-01 to 2024-08-31
Downloading: 2024-09-01 to 2024-09-29
# ==============================================================================
# 2. STADIUM GEOMETRY DATABASE
# ==============================================================================
stadium_dims <- tibble(
  team = c("ARI","ATL","BAL","BOS","CHC","CWS","CIN","CLE","COL","DET",
           "HOU","KC","LAA","LAD","MIA","MIL","MIN","NYM","NYY","OAK",
           "PHI","PIT","SD","SEA","SF","STL","TB","TEX","TOR","WSH"),
  # Left, Left-Center, Center, Right-Center, Right
  d_lf = c(330, 335, 333, 310, 355, 330, 328, 325, 347, 342, 
           315, 330, 330, 330, 344, 344, 339, 335, 318, 330, 
           329, 325, 334, 331, 339, 336, 315, 329, 328, 336),
  d_lc = c(374, 385, 384, 379, 375, 375, 379, 370, 390, 370, 
           362, 385, 387, 375, 386, 371, 377, 370, 399, 375, 
           374, 383, 375, 378, 399, 375, 370, 372, 375, 377),
  d_cf = c(407, 400, 400, 390, 400, 400, 404, 400, 415, 420, 
           409, 410, 396, 395, 407, 400, 404, 408, 408, 400, 
           401, 399, 396, 401, 391, 400, 404, 408, 400, 402),
  d_rc = c(374, 375, 373, 420, 375, 375, 370, 375, 375, 365, 
           373, 385, 370, 375, 392, 374, 367, 370, 385, 375, 
           369, 375, 375, 381, 415, 375, 370, 374, 375, 370),
  d_rf = c(330, 325, 318, 302, 353, 335, 325, 325, 350, 330, 
           326, 330, 330, 330, 335, 345, 328, 330, 314, 330, 
           330, 320, 322, 326, 309, 335, 322, 326, 328, 335)
)

get_stadium_poly <- function(team_code) {
  s <- stadium_dims %>% filter(team == team_code)
  if(nrow(s) == 0) return(NULL)
  ref <- data.frame(
    angle = c(-45, -22.5, 0, 22.5, 45),
    dist  = c(s$d_lf, s$d_lc, s$d_cf, s$d_rc, s$d_rf)
  ) %>% mutate(rad=angle*pi/180, x=dist*sin(rad), y=dist*cos(rad))
  return(as.data.frame(spline(ref$x, ref$y, n = 100)))
}

# ==============================================================================
# 3. CLEANING & MODELING
# ==============================================================================
clean_data <- statcast_data %>%
  # Filter for relevant events
  filter(
    events %in% c("field_out", "single", "double", "triple", "home_run", "sac_fly"),
    !bb_type %in% c("ground_ball", "popup"), # Remove obvious infield plays
    !is.na(hit_distance_sc), !is.na(launch_speed), !is.na(launch_angle)
  ) %>%
  # Optional: Downsample if plot crashes (currently set to 100% data)
  sample_frac(1) %>% 
  mutate(
    fielder_raw = str_extract(des, "(?<=fielder\\s)[^.,]+"),
    fielder_name = str_trim(fielder_raw),
    is_caught = ifelse(events %in% c("field_out", "sac_fly"), 1, 0),
    
    spray_angle = atan((hc_x - 125.42) / (198.27 - hc_y)) * 180 / pi,
    raw_x = (hc_x - 125.42) * 2.5,
    raw_y = (198.27 - hc_y) * 2.5,
    angle_deg = atan2(raw_x, raw_y) * 180 / pi
  )

# Global Catch Model
catch_model <- glm(is_caught ~ launch_speed + launch_angle + hit_distance_sc + abs(spray_angle), 
                   data = clean_data, family = "binomial")
clean_data$predicted_prob <- predict(catch_model, clean_data, type = "response")

# ==============================================================================
# 4. BUILDING THE PLOT (WITH SNAPPING)
# ==============================================================================
teams_with_data <- sort(unique(clean_data$home_team))
teams_with_geom <- unique(stadium_dims$team)
valid_teams <- intersect(teams_with_data, teams_with_geom)

if(length(valid_teams) == 0) stop("No matching teams found in data.")

fig <- plot_ly()
infield <- data.frame(x = c(0, 63, 0, -63, 0), y = c(0, 63, 126, 63, 0))
visibility_list <- list()

for(i in seq_along(valid_teams)) {
  t <- valid_teams[i]
  
  # A. Geometry
  s_geom <- stadium_dims %>% filter(team == t)
  poly_t <- get_stadium_poly(t)
  
  # Fence Lookup Function
  ref_pts <- data.frame(
    angle = c(-45, -22.5, 0, 22.5, 45), 
    dist = c(s_geom$d_lf, s_geom$d_lc, s_geom$d_cf, s_geom$d_rc, s_geom$d_rf)
  )
  get_fence_dist <- approxfun(ref_pts$angle, ref_pts$dist, rule = 2)
  
  # B. Data Processing (Team Specific)
  data_t <- clean_data %>% 
    filter(home_team == t) %>%
    mutate(
      catch_prob = ifelse(events == "home_run", 0, predicted_prob),
      
      # 1. Snap Foul Balls
      is_fair_hit = events %in% c("single", "double", "triple"),
      fixed_angle = case_when(
        is_fair_hit & angle_deg > 45 ~ 44,
        is_fair_hit & angle_deg < -45 ~ -44,
        TRUE ~ angle_deg
      ),
      
      # 2. Snap Fake Homers
      current_dist = sqrt(raw_x^2 + raw_y^2),
      max_dist = get_fence_dist(fixed_angle),
      final_dist = case_when(
        events != "home_run" & current_dist >= max_dist ~ max_dist - 5,
        TRUE ~ current_dist
      ),
      
      coord_x = final_dist * sin(fixed_angle * pi / 180),
      coord_y = final_dist * cos(fixed_angle * pi / 180),
      
      # 3. Hover Info
      display_fielder = ifelse(is_caught == 1 & !is.na(fielder_name), fielder_name, "N/A"),
      hover_txt = paste0(
        "<b>Batter:</b> ", player_name, "<br>",
        "<b>Event:</b> ", events, "<br>",
        "<b>Caught By:</b> ", display_fielder, "<br>",
        "------------------<br>",
        "<b>Exit Velo:</b> ", round(launch_speed, 1), " mph<br>",
        "<b>Launch Angle:</b> ", round(launch_angle, 1), "°<br>",
        "<b>Distance:</b> ", round(hit_distance_sc, 0), " ft<br>",
        "<b>Catch Prob:</b> ", round(catch_prob * 100, 1), "%"
      )
    ) %>%
    # Filter out routine short fly balls (< 90% catch prob) to keep plot performant
    filter(coord_y > 50, catch_prob < 0.90)
  
  if(nrow(data_t) == 0) data_t <- data.frame(coord_x=0, coord_y=0, hover_txt="", catch_prob=0)[0,]

  is_visible <- (i == 1)
  
  # C. Add Traces
  # Trace 1: Grass
  fig <- fig %>% add_polygons(
    x = c(0, poly_t$x, 0), y = c(0, poly_t$y, 0),
    fillcolor = "#35682d", opacity = 0.8, line = list(color = "white", width = 2),
    name = paste(t, "Field"), visible = is_visible, hoverinfo = "skip"
  )
  
  # Trace 2: Infield
  fig <- fig %>% add_polygons(
    data = infield, x = ~x, y = ~y,
    fillcolor = "#8b4513", line = list(color = "black"),
    name = "Infield", visible = is_visible, hoverinfo = "skip"
  )
  
  # Trace 3: Data Points
  fig <- fig %>% add_markers(
    x = data_t$coord_x, 
    y = data_t$coord_y,
    text = data_t$hover_txt,
    hoverinfo = "text",
    marker = list(
      size = 5, # Slightly smaller dots for dense data
      line = list(color = "white", width = 0.2),
      color = data_t$catch_prob,
      colorscale = list(c(0, "green"), c(0.5, "yellow"), c(1, "red")),
      cmin = 0, cmax = 1,
      colorbar = list(title = "Catch Prob")
    ),
    name = paste(t, "Balls"), visible = is_visible
  )
  
  vis_vec <- rep(FALSE, length(valid_teams) * 3)
  vis_vec[((i-1)*3 + 1):((i-1)*3 + 3)] <- TRUE
  visibility_list[[i]] <- list(method = "restyle", args = list("visible", vis_vec), label = t)
}

fig <- fig %>% layout(
  title = "2024 MLB Fly Balls (Full Season)",
  xaxis = list(visible = FALSE, range = c(-250, 250), fixedrange=TRUE),
  yaxis = list(visible = FALSE, range = c(0, 480), fixedrange=TRUE),
  showlegend = FALSE,
  plot_bgcolor = "#f0f0f0",
  updatemenus = list(list(y = 1.1, x = 0.1, buttons = visibility_list))
)

fig